# The version of R used is 3.6.1。 
#Read listings_detail.csv file
df<-read.csv("listing_detail.csv",encoding="UTF-8",stringsAsFactors=F)

#Query the data that is not listed in the wrong column
df2<-subset(df,(df$instant_bookable=="t"|df$instant_bookable=="f"|df$instant_bookable=="") & 
              (df$host_has_profile_pic=="t"|df$host_has_profile_pic=="f"|df$host_has_profile_pic=="") &
              (df$has_availability=="t"|df$has_availability=="f"|df$has_availability==""))
dim(df2)
## [1] 25330    74
#Convert number_of_reviews to numeric
df2$number_of_reviews<-as.numeric(df2$number_of_reviews)
#Query data with number_of_reviews greater than 0
df3<-subset(df2,df2$number_of_reviews>0)
dim(df3)
## [1] 11600    74
#Delete columns with too many missing values
re<-numeric(74)
for(i in 1:74){
  a<-df3[,i]
  re[i]<-length(a[is.na(a)|a==""|a=="N/A"])/length(a)}
re
##  [1] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0002586207
##  [6] 0.0164655172 0.3248275862 0.0000000000 0.0000000000 0.0000000000
## [11] 0.0000862069 0.0000862069 0.0011206897 0.4183620690 0.5970689655
## [16] 0.5970689655 0.4132758621 0.0000862069 0.0000862069 0.0000862069
## [21] 0.3402586207 0.0000862069 0.0000862069 0.0000000000 0.0000862069
## [26] 0.0000862069 0.3246551724 0.0000000000 1.0000000000 0.0000000000
## [31] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000
## [36] 0.0006896552 0.0641379310 0.0054310345 0.0000000000 0.0000000000
## [41] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [46] 0.0000000000 0.0000000000 0.0000000000 1.0000000000 0.0000000000
## [51] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [56] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [61] 0.0475862069 0.0507758621 0.0493965517 0.0512931034 0.0497413793
## [66] 0.0513793103 0.0515517241 1.0000000000 0.0000000000 0.0000000000
## [71] 0.0000000000 0.0000000000 0.0000000000 0.0000000000
df4<-df3[,-which(re>0.1)]
#Output deleted column names
names(df3)[which(re>0.1)]
##  [1] "neighborhood_overview"        "host_about"                  
##  [3] "host_response_time"           "host_response_rate"          
##  [5] "host_acceptance_rate"         "host_neighbourhood"          
##  [7] "neighbourhood"                "neighbourhood_group_cleansed"
##  [9] "bathrooms"                    "calendar_updated"            
## [11] "license"
#Delete columns with only one level factor
re<-numeric(63)
for(i in 1:63){
  a<-df4[,i]
  re[i]<-length(unique(a))}
re
##  [1] 11600 11600     1     1 11456 11279 11532  9521  9521  3727  2873   835
## [13]     3  9496  9496    61    61   278     3     3    38  8069  8679    62
## [25]     4    16    30    11    19 11041   647    57   156    56    62   146
## [37]   147   184   203     1    31    61    91   365     1   282    78    16
## [49]  2339  1828    46    10    10    10    10     9    10     2    51    47
## [61]    19    10   537
df5<-df4[,-which(re<=1)]
#Output deleted column names
names(df4)[which(re<=1)]
## [1] "scrape_id"             "last_scraped"          "has_availability"     
## [4] "calendar_last_scraped"
#Meaningless column
del<-c("listing_url","picture_url","host_id","host_url","host_name","host_since",
       "host_thumbnail_url","host_picture_url","first_review","last_review")
df6<-df5[,!(names(df5)%in%del)]


#Texts which need to do text analysis
#amenities
#property_type
#host_verifications
#name
#description
#host_location
a<-character(nrow(df6))
#Text column merge
for(i in c(2,3,4,8,14,20)){a<-paste(a,df6[,i],sep=" ")}
#Text information processing
library(jiebaRD)
## Warning: package 'jiebaRD' was built under R version 3.6.3
library(jiebaR)
## Warning: package 'jiebaR' was built under R version 3.6.3
library(NLP)
## Warning: package 'NLP' was built under R version 3.6.3
library(tm)
## Warning: package 'tm' was built under R version 3.6.3
#Split vocabulary
keyword<-segment(a,worker(byline=T))
#Generate thesaurus
Thesaurus<-VCorpus(VectorSource(keyword))
#Generate document term frequency matrix
keywordmatrix<-DocumentTermMatrix(Thesaurus)
#Remove sparse word frequency
keywordmatrix2<-removeSparseTerms(keywordmatrix, sparse=0.95)
keywordmarix3<-as.matrix(keywordmatrix2)
dim(keywordmarix3)
## [1] 11600   390
#Delete text information column
df7<-df6[,-c(2,3,4,8,14,20)]


#bathrooms_text needs special processing of text information
unique(df7$bathrooms_text)
##  [1] "1 shared bath"    "3 baths"          "1 private bath"   "1 bath"          
##  [5] "2.5 baths"        "1.5 shared baths" "2.5 shared baths" "2 baths"         
##  [9] "0 shared baths"   "1.5 baths"        ""                 "0 baths"         
## [13] "2 shared baths"   "5 baths"          "Half-bath"        "4 baths"         
## [17] "3.5 baths"        "4.5 baths"        "6 baths"          "Shared half-bath"
## [21] "5.5 baths"        "3 shared baths"   "3.5 shared baths" "8 shared baths"  
## [25] "6.5 baths"        "4 shared baths"   "4.5 shared baths" "6 shared baths"  
## [29] "18 baths"         "18 shared baths"
library(stringr)
## Warning: package 'stringr' was built under R version 3.6.3
df7$bathsum<-as.numeric(str_extract(df7$bathrooms_text,"\\d\\.{0,1}\\d{0,1}"))
df7$bathclass<-ifelse(str_detect(df7$bathrooms_text,"share"),1,0)
df7$price<-as.numeric(str_replace_all(df7$price,"\\$|,",""))
#Delete the bathrooms_text column
df8<-df7[,-12]


#Change the order of the columns to make the factor type and the data set separate
df8<-df8[,c(1,2,5:7,37,10,3:4,8:9,11:26,28:36,38:44,27)]
#Turn to factor variable
for(i in 2:7){df8[,i]<-as.factor(df8[,i])}
#Turn to numeric variable
for(i in 8:41){df8[,i]<-as.numeric(df8[,i])}
#Merge data set and word frequency matrix
df9<-cbind(df8,keywordmarix3)
keywordmarix3[1:100,1:6]
##      Terms
## Docs  access across after aid air airport
##   1        0      0     0   0   0       0
##   2        0      0     0   1   1       0
##   3        0      0     1   0   1       0
##   4        0      0     0   1   1       0
##   5        1      0     0   1   1       0
##   6        1      0     1   0   0       0
##   7        0      0     0   0   0       0
##   8        0      0     0   0   0       0
##   9        0      0     0   0   0       0
##   10       1      0     0   1   0       0
##   11       1      0     0   0   0       0
##   12       1      0     0   0   0       0
##   13       1      0     0   1   1       1
##   14       0      0     0   0   1       2
##   15       1      0     0   1   0       0
##   16       0      0     0   0   0       0
##   17       2      0     0   0   0       0
##   18       0      0     0   1   1       0
##   19       2      1     0   0   1       0
##   20       0      0     0   1   0       0
##   21       0      0     0   0   1       0
##   22       0      0     0   0   0       0
##   23       0      0     0   0   1       0
##   24       0      0     0   0   1       0
##   25       0      1     0   0   0       0
##   26       0      0     0   0   0       0
##   27       0      0     0   0   0       0
##   28       0      0     0   0   2       0
##   29       2      0     0   1   1       0
##   30       1      0     0   1   0       0
##   31       0      0     1   1   0       0
##   32       2      0     0   0   0       0
##   33       0      0     0   1   2       0
##   34       0      0     0   0   0       0
##   35       0      0     0   0   1       0
##   36       0      0     2   0   0       0
##   37       0      0     0   0   1       0
##   38       0      0     0   0   0       0
##   39       0      0     0   0   0       0
##   40       0      0     0   1   0       0
##   41       1      0     0   0   2       0
##   42       0      0     0   0   0       0
##   43       1      0     1   0   0       0
##   44       0      0     0   1   0       0
##   45       0      0     0   0   0       0
##   46       0      0     0   1   1       0
##   47       0      0     0   1   0       0
##   48       0      0     0   1   2       0
##   49       0      0     0   0   2       0
##   50       0      0     0   1   0       0
##   51       0      0     0   0   0       0
##   52       0      1     0   0   0       0
##   53       0      0     0   0   1       0
##   54       0      0     0   0   2       0
##   55       0      0     0   1   0       1
##   56       0      0     0   0   2       0
##   57       0      0     0   1   1       0
##   58       0      0     0   0   3       0
##   59       0      0     0   0   0       0
##   60       0      0     0   0   0       0
##   61       0      0     0   0   0       0
##   62       0      0     0   1   0       0
##   63       0      0     0   1   1       0
##   64       0      0     0   0   1       0
##   65       1      0     0   0   0       0
##   66       0      0     0   0   2       0
##   67       0      0     0   1   0       1
##   68       0      0     0   1   0       0
##   69       0      0     0   0   2       0
##   70       1      0     0   0   0       0
##   71       0      0     0   1   0       0
##   72       0      0     0   0   0       0
##   73       0      0     0   0   1       0
##   74       0      0     0   0   0       0
##   75       0      0     0   1   0       0
##   76       1      0     1   0   0       0
##   77       1      0     0   1   0       0
##   78       0      0     0   0   0       0
##   79       0      0     1   0   1       0
##   80       0      0     0   0   0       0
##   81       0      0     0   0   0       0
##   82       0      0     0   0   0       0
##   83       1      0     0   0   1       0
##   84       0      0     0   0   1       0
##   85       0      0     0   0   2       0
##   86       0      0     0   0   0       0
##   87       0      0     0   0   0       0
##   88       0      0     0   0   0       0
##   89       0      0     1   0   1       0
##   90       2      0     0   0   0       0
##   91       0      0     1   0   1       0
##   92       0      0     1   0   1       0
##   93       2      0     0   1   1       1
##   94       0      1     0   0   1       1
##   95       0      0     0   0   0       0
##   96       0      0     0   0   0       0
##   97       2      0     0   0   1       0
##   98       0      0     0   1   2       0
##   99       0      0     0   0   1       0
##   100      0      0     0   0   1       0
#Delete lines with missing values
df10<-na.omit(df9)
dim(df10)
## [1] 10220   434
df10<-df10[order(-df10$number_of_reviews),]
df10$number_of_reviews<-rep(c(1,0),c(100,10120))
names(df10)[44]<-"istop100"
df10<-df10[sample(nrow(df10),nrow(df10)),]
write.csv(df10,"cleaned_data.csv",row.names=F)


#Read cleaned_data.csv file
df<-read.csv("cleaned_data.csv")
dim(df)
## [1] 10220   434
summary(df)
##        id           host_is_superhost host_has_profile_pic
##  Min.   :   11156   f:8429            f:   18             
##  1st Qu.: 9686147   t:1791            t:10202             
##  Median :21470862                                         
##  Mean   :21386240                                         
##  3rd Qu.:31805768                                         
##  Max.   :46128796                                         
##                                                           
##  host_identity_verified neighbourhood_cleansed instant_bookable
##  f:2053                 Sydney   :2540         f:6341          
##  t:8167                 Waverley :1464         t:3879          
##                         Randwick : 876                         
##                         Warringah: 547                         
##                         Manly    : 507                         
##                         Woollahra: 440                         
##                         (Other)  :3846                         
##            room_type    host_listings_count host_total_listings_count
##  Entire home/apt:6658   Min.   :  0.000     Min.   :  0.000          
##  Hotel room     :  65   1st Qu.:  1.000     1st Qu.:  1.000          
##  Private room   :3384   Median :  1.000     Median :  1.000          
##  Shared room    : 113   Mean   :  6.201     Mean   :  6.201          
##                         3rd Qu.:  2.000     3rd Qu.:  2.000          
##                         Max.   :225.000     Max.   :225.000          
##                                                                      
##     latitude        longitude      accommodates       bedrooms     
##  Min.   :-34.10   Min.   :150.7   Min.   : 1.000   Min.   : 1.000  
##  1st Qu.:-33.90   1st Qu.:151.2   1st Qu.: 2.000   1st Qu.: 1.000  
##  Median :-33.88   Median :151.2   Median : 2.000   Median : 1.000  
##  Mean   :-33.86   Mean   :151.2   Mean   : 3.482   Mean   : 1.688  
##  3rd Qu.:-33.83   3rd Qu.:151.3   3rd Qu.: 4.000   3rd Qu.: 2.000  
##  Max.   :-33.40   Max.   :151.3   Max.   :16.000   Max.   :20.000  
##                                                                    
##       beds            price         minimum_nights    maximum_nights  
##  Min.   : 0.000   Min.   :   11.0   Min.   :  1.000   Min.   :   1.0  
##  1st Qu.: 1.000   1st Qu.:   78.0   1st Qu.:  1.000   1st Qu.:  30.0  
##  Median : 1.000   Median :  126.0   Median :  2.000   Median :1125.0  
##  Mean   : 2.024   Mean   :  199.8   Mean   :  5.307   Mean   : 667.4  
##  3rd Qu.: 3.000   3rd Qu.:  215.2   3rd Qu.:  5.000   3rd Qu.:1125.0  
##  Max.   :19.000   Max.   :28613.0   Max.   :500.000   Max.   :1825.0  
##                                                                       
##  minimum_minimum_nights maximum_minimum_nights minimum_maximum_nights
##  Min.   :  1.000        Min.   :  1.000        Min.   :1.000e+00     
##  1st Qu.:  1.000        1st Qu.:  2.000        1st Qu.:4.500e+01     
##  Median :  2.000        Median :  3.000        Median :1.125e+03     
##  Mean   :  4.986        Mean   :  5.831        Mean   :8.413e+05     
##  3rd Qu.:  4.000        3rd Qu.:  5.000        3rd Qu.:1.125e+03     
##  Max.   :500.000        Max.   :500.000        Max.   :2.147e+09     
##                                                                      
##  maximum_maximum_nights minimum_nights_avg_ntm maximum_nights_avg_ntm
##  Min.   :1.000e+00      Min.   :  1.000        Min.   :1.000e+00     
##  1st Qu.:4.700e+01      1st Qu.:  1.975        1st Qu.:4.500e+01     
##  Median :1.125e+03      Median :  2.100        Median :1.125e+03     
##  Mean   :8.413e+05      Mean   :  5.389        Mean   :8.413e+05     
##  3rd Qu.:1.125e+03      3rd Qu.:  5.000        3rd Qu.:1.125e+03     
##  Max.   :2.147e+09      Max.   :500.000        Max.   :2.147e+09     
##                                                                      
##  availability_30  availability_60 availability_90 availability_365
##  Min.   : 0.000   Min.   : 0.00   Min.   : 0.00   Min.   :  0.0   
##  1st Qu.: 0.000   1st Qu.: 0.00   1st Qu.: 0.00   1st Qu.:  0.0   
##  Median : 0.000   Median : 0.00   Median : 0.00   Median :  0.0   
##  Mean   : 7.434   Mean   :15.92   Mean   :25.95   Mean   : 94.3   
##  3rd Qu.:17.000   3rd Qu.:35.00   3rd Qu.:59.00   3rd Qu.:179.0   
##  Max.   :30.000   Max.   :60.00   Max.   :90.00   Max.   :365.0   
##                                                                   
##  number_of_reviews_ltm number_of_reviews_l30d review_scores_rating
##  Min.   :  0.000       Min.   : 0.0000        Min.   : 20.00      
##  1st Qu.:  0.000       1st Qu.: 0.0000        1st Qu.: 91.00      
##  Median :  1.000       Median : 0.0000        Median : 97.00      
##  Mean   :  4.071       Mean   : 0.1954        Mean   : 93.51      
##  3rd Qu.:  4.000       3rd Qu.: 0.0000        3rd Qu.:100.00      
##  Max.   :147.000       Max.   :29.0000        Max.   :100.00      
##                                                                   
##  review_scores_accuracy review_scores_cleanliness review_scores_checkin
##  Min.   : 2.000         Min.   : 2.000            Min.   : 2.000       
##  1st Qu.: 9.000         1st Qu.: 9.000            1st Qu.:10.000       
##  Median :10.000         Median :10.000            Median :10.000       
##  Mean   : 9.551         Mean   : 9.219            Mean   : 9.729       
##  3rd Qu.:10.000         3rd Qu.:10.000            3rd Qu.:10.000       
##  Max.   :10.000         Max.   :10.000            Max.   :10.000       
##                                                                        
##  review_scores_communication review_scores_location review_scores_value
##  Min.   : 2.000              Min.   : 2.000         Min.   : 2.00      
##  1st Qu.:10.000              1st Qu.:10.000         1st Qu.: 9.00      
##  Median :10.000              Median :10.000         Median :10.00      
##  Mean   : 9.733              Mean   : 9.702         Mean   : 9.33      
##  3rd Qu.:10.000              3rd Qu.:10.000         3rd Qu.:10.00      
##  Max.   :10.000              Max.   :10.000         Max.   :10.00      
##                                                                        
##  calculated_host_listings_count calculated_host_listings_count_entire_homes
##  Min.   :  1.000                Min.   :  0.00                             
##  1st Qu.:  1.000                1st Qu.:  0.00                             
##  Median :  1.000                Median :  1.00                             
##  Mean   :  5.224                Mean   :  4.23                             
##  3rd Qu.:  2.000                3rd Qu.:  1.00                             
##  Max.   :136.000                Max.   :136.00                             
##                                                                            
##  calculated_host_listings_count_private_rooms
##  Min.   : 0.0000                             
##  1st Qu.: 0.0000                             
##  Median : 0.0000                             
##  Mean   : 0.8863                             
##  3rd Qu.: 1.0000                             
##  Max.   :77.0000                             
##                                              
##  calculated_host_listings_count_shared_rooms reviews_per_month    bathsum      
##  Min.   : 0.00000                            Min.   : 0.0100   Min.   : 0.000  
##  1st Qu.: 0.00000                            1st Qu.: 0.0700   1st Qu.: 1.000  
##  Median : 0.00000                            Median : 0.2000   Median : 1.000  
##  Mean   : 0.06477                            Mean   : 0.6241   Mean   : 1.368  
##  3rd Qu.: 0.00000                            3rd Qu.: 0.7600   3rd Qu.: 1.500  
##  Max.   :17.00000                            Max.   :11.0600   Max.   :18.000  
##                                                                                
##    bathclass         istop100            access           across       
##  Min.   :0.0000   Min.   :0.000000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.000000   Median :0.5000   Median :0.00000  
##  Mean   :0.1909   Mean   :0.009785   Mean   :0.7841   Mean   :0.06556  
##  3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.000000   Max.   :6.0000   Max.   :3.00000  
##                                                                        
##      after              aid              air            airport      
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.07035   Mean   :0.3409   Mean   :0.6273   Mean   :0.1136  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :3.00000   Max.   :3.0000   Max.   :5.0000   Max.   :6.0000  
##                                                                      
##       airy             alarm            all            allowed      
##  Min.   :0.00000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :1.000   Median :0.0000   Median :0.0000  
##  Mean   :0.06487   Mean   :1.151   Mean   :0.5242   Mean   :0.4587  
##  3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :3.00000   Max.   :3.000   Max.   :6.0000   Max.   :3.0000  
##                                                                     
##       also           amazing          amenities            and        
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.: 4.000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median : 6.000  
##  Mean   :0.2651   Mean   :0.07759   Mean   :0.08982   Mean   : 6.028  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.: 8.000  
##  Max.   :6.0000   Max.   :6.00000   Max.   :3.00000   Max.   :19.000  
##                                                                       
##       any            apartment        appliances           are        
##  Min.   :0.00000   Min.   : 0.000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.: 0.000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.00000   Median : 2.000   Median :0.00000   Median :0.0000  
##  Mean   :0.06125   Mean   : 1.855   Mean   :0.07906   Mean   :0.6304  
##  3rd Qu.:0.00000   3rd Qu.: 3.000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :4.00000   Max.   :10.000   Max.   :3.00000   Max.   :9.0000  
##                                                                       
##       area            areas           around             art         
##  Min.   :0.0000   Min.   :0.000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.000   Median :0.00000   Median :0.00000  
##  Mean   :0.4422   Mean   :0.118   Mean   :0.08933   Mean   :0.08239  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :7.0000   Max.   :3.000   Max.   :4.00000   Max.   :5.00000  
##                                                                      
##    australia        available           away             back       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.8751   Mean   :0.2576   Mean   :0.2644   Mean   :0.1049  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :3.0000   Max.   :5.0000   Max.   :5.0000   Max.   :5.0000  
##                                                                     
##     backyard         balcony            bars            basics      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2926   Mean   :0.6933   Mean   :0.1434   Mean   :0.4524  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :5.0000   Max.   :7.0000   Max.   :3.0000   Max.   :2.0000  
##                                                                     
##       bath           bathroom        bathrooms         bathtub      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.1353   Mean   :0.4984   Mean   :0.1019   Mean   :0.1369  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :6.0000   Max.   :3.0000   Max.   :3.0000  
##                                                                     
##       bay              bbq             beach            beaches      
##  Min.   :0.0000   Min.   :0.0000   Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median : 0.0000   Median :0.0000  
##  Mean   :0.1233   Mean   :0.3078   Mean   : 0.9493   Mean   :0.1593  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.: 2.0000   3rd Qu.:0.0000  
##  Max.   :7.0000   Max.   :4.0000   Max.   :14.0000   Max.   :6.0000  
##                                                                      
##    beautiful           bed            bedroom        bedrooms.1    
##  Min.   :0.0000   Min.   : 0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.: 0.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.0000   Median : 1.000   Median :1.000   Median :0.0000  
##  Mean   :0.3021   Mean   : 1.073   Mean   :1.101   Mean   :0.2669  
##  3rd Qu.:0.0000   3rd Qu.: 2.000   3rd Qu.:2.000   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :10.000   Max.   :9.000   Max.   :5.0000  
##                                                                    
##      beds.1            been             before             best       
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.1065   Mean   :0.05724   Mean   :0.07153   Mean   :0.1273  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :3.00000   Max.   :4.00000   Max.   :4.0000  
##                                                                       
##       big            blankets          block             bondi       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.0954   Mean   :0.2869   Mean   :0.06859   Mean   :0.5107  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :3.0000   Max.   :3.00000   Max.   :9.0000  
##                                                                      
##      books             both             brand          breakfast     
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.1108   Mean   :0.09393   Mean   :0.1019   Mean   :0.1875  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :3.0000   Max.   :5.00000   Max.   :9.0000   Max.   :6.0000  
##                                                                      
##      bright          building         built            bus        
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Median :0.000   Median :0.0000  
##  Mean   :0.1538   Mean   :0.186   Mean   :0.125   Mean   :0.2858  
##  3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :5.000   Max.   :4.000   Max.   :8.0000  
##                                                                   
##      buses            business            but             cable      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.000  
##  Mean   :0.08043   Mean   :0.07427   Mean   :0.1527   Mean   :0.195  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.000  
##  Max.   :4.00000   Max.   :3.00000   Max.   :4.0000   Max.   :5.000  
##                                                                      
##       cafe             cafes            can              car         
##  Min.   :0.00000   Min.   :0.000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000   Median :0.0000   Median :0.00000  
##  Mean   :0.05675   Mean   :0.289   Mean   :0.3361   Mean   :0.09119  
##  3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :3.00000   Max.   :3.000   Max.   :6.0000   Max.   :3.00000  
##                                                                      
##      carbon            cbd            central           centre      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2352   Mean   :0.2724   Mean   :0.1474   Mean   :0.1303  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :5.0000   Max.   :5.0000   Max.   :5.0000  
##                                                                     
##      chair           children           city            clean       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.1091   Mean   :0.1867   Mean   :0.5178   Mean   :0.1442  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :6.0000   Max.   :7.0000   Max.   :5.0000  
##                                                                     
##     cleaning           close            coffee        comfortable    
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.06585   Mean   :0.3912   Mean   :0.3832   Mean   :0.2291  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :4.00000   Max.   :6.0000   Max.   :6.0000   Max.   :4.0000  
##                                                                      
##      comfy          conditioning      connection        convenient     
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.06203   Mean   :0.5491   Mean   :0.06213   Mean   :0.07769  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :4.00000   Max.   :4.0000   Max.   :2.00000   Max.   :3.00000  
##                                                                        
##      coogee          cooking            cosy             couple      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.1045   Mean   :0.5037   Mean   :0.08415   Mean   :0.0682  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :7.0000   Max.   :4.0000   Max.   :4.00000   Max.   :6.0000  
##                                                                      
##     couples          courtyard            crib          darkening      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.07847   Mean   :0.08845   Mean   :0.1211   Mean   :0.08072  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :2.00000   Max.   :4.00000   Max.   :4.0000   Max.   :1.00000  
##                                                                        
##     darling             day               deck             dining      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.08004   Mean   :0.09961   Mean   :0.07652   Mean   :0.2669  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :9.00000   Max.   :4.00000   Max.   :5.00000   Max.   :5.0000  
##                                                                        
##      dishes         dishwasher        distance          door       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.000   Median :0.0000  
##  Mean   :0.5016   Mean   :0.4085   Mean   :0.151   Mean   :0.2169  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:0.0000  
##  Max.   :3.0000   Max.   :3.0000   Max.   :5.000   Max.   :4.0000  
##                                                                    
##      double            down           downstairs          drive        
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.2556   Mean   :0.05362   Mean   :0.06546   Mean   :0.09119  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :6.0000   Max.   :3.00000   Max.   :3.00000   Max.   :6.00000  
##                                                                        
##     dropoff           dryer            easy           elevator     
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :2.000   Median :0.0000   Median :0.0000  
##  Mean   :0.1801   Mean   :1.456   Mean   :0.1423   Mean   :0.2532  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :4.000   Max.   :4.0000   Max.   :3.0000  
##                                                                    
##      email           enjoy           ensuite            entire    
##  Min.   :0.000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00  
##  Median :1.000   Median :0.0000   Median :0.00000   Median :1.00  
##  Mean   :1.113   Mean   :0.2552   Mean   :0.09863   Mean   :0.74  
##  3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.00  
##  Max.   :4.000   Max.   :6.0000   Max.   :5.00000   Max.   :4.00  
##                                                                   
##     entrance         equipped       essentials         etc         
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:1.000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.000   Median :1.000   Median :0.00000  
##  Mean   :0.3307   Mean   :0.196   Mean   :1.012   Mean   :0.06135  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:1.000   3rd Qu.:0.00000  
##  Max.   :4.0000   Max.   :5.000   Max.   :4.000   Max.   :5.00000  
##                                                                    
##     ethernet         everything      extinguisher        extra       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.05616   Mean   :0.1521   Mean   :0.3242   Mean   :0.3367  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :2.00000   Max.   :4.0000   Max.   :1.0000   Max.   :4.0000  
##                                                                      
##     facebook        facilities        families           family      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.1984   Mean   :0.1179   Mean   :0.06761   Mean   :0.2265  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :4.0000   Max.   :3.00000   Max.   :7.0000  
##                                                                      
##      famous           features           feel            ferry        
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.08405   Mean   :0.1023   Mean   :0.0955   Mean   :0.07916  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :3.00000   Max.   :4.0000   Max.   :5.0000   Max.   :5.00000  
##                                                                       
##       few              filled            fire          fireplace      
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.05949   Mean   :0.1114   Mean   :0.3356   Mean   :0.09227  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :3.00000   Max.   :4.0000   Max.   :3.0000   Max.   :4.00000  
##                                                                       
##      first             flat            floor            for.      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.0000   Median :0.0000   Median :0.000   Median :1.000  
##  Mean   :0.3821   Mean   :0.1078   Mean   :0.224   Mean   :1.192  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:2.000  
##  Max.   :7.0000   Max.   :6.0000   Max.   :6.000   Max.   :9.000  
##                                                                   
##       free           fridge           friendly          from        
##  Min.   : 0.00   Min.   :0.00000   Min.   :0.000   Min.   : 0.0000  
##  1st Qu.: 0.00   1st Qu.:0.00000   1st Qu.:0.000   1st Qu.: 0.0000  
##  Median : 1.00   Median :0.00000   Median :1.000   Median : 0.0000  
##  Mean   : 1.02   Mean   :0.09736   Mean   :0.772   Mean   : 0.7746  
##  3rd Qu.: 2.00   3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.: 1.0000  
##  Max.   :10.00   Max.   :3.00000   Max.   :5.000   Max.   :12.0000  
##                                                                     
##      front             full            fully          furnished     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.1154   Mean   :0.1728   Mean   :0.2789   Mean   :0.1469  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :5.0000   Max.   :6.0000   Max.   :4.0000  
##                                                                     
##      garden             gas               gel               get         
##  Min.   : 0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median : 0.0000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   : 0.4385   Mean   :0.07857   Mean   :0.09159   Mean   :0.07143  
##  3rd Qu.: 1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :10.0000   Max.   :4.00000   Max.   :2.00000   Max.   :4.00000  
##                                                                         
##       good           government        great            greets      
##  Min.   :0.00000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :1.000   Median :0.0000   Median :0.0000  
##  Mean   :0.08943   Mean   :1.277   Mean   :0.2699   Mean   :0.1464  
##  3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :4.00000   Max.   :3.000   Max.   :5.0000   Max.   :2.0000  
##                                                                     
##      grill            ground            guest           guests      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.000   Median :0.0000  
##  Mean   :0.1503   Mean   :0.06468   Mean   :0.507   Mean   :0.2324  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :4.00000   Max.   :7.000   Max.   :7.0000  
##                                                                     
##       gym              hair          hangers          harbour       
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:1.0000   1st Qu.: 0.0000  
##  Median :0.0000   Median :1.000   Median :1.0000   Median : 0.0000  
##  Mean   :0.1838   Mean   :0.713   Mean   :0.8125   Mean   : 0.2337  
##  3rd Qu.:0.0000   3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.: 0.0000  
##  Max.   :5.0000   Max.   :3.000   Max.   :3.0000   Max.   :11.0000  
##                                                                     
##       has              have             heart           heating      
##  Min.   :0.0000   Min.   : 0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.: 0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median : 0.0000   Median :0.0000   Median :1.0000  
##  Mean   :0.5863   Mean   : 0.5109   Mean   :0.1796   Mean   :0.6841  
##  3rd Qu.:1.0000   3rd Qu.: 1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :8.0000   Max.   :10.0000   Max.   :3.0000   Max.   :4.0000  
##                                                                      
##       high            hills            holiday             home        
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.: 0.0000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median : 0.0000  
##  Mean   :0.2105   Mean   :0.09393   Mean   :0.07808   Mean   : 0.6689  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.: 1.0000  
##  Max.   :4.0000   Max.   :5.00000   Max.   :4.00000   Max.   :12.0000  
##                                                                        
##       host             hot            hotel            house        
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.: 0.0000  
##  Median :0.0000   Median :1.000   Median :0.0000   Median : 0.0000  
##  Mean   :0.1703   Mean   :0.726   Mean   :0.0636   Mean   : 0.9113  
##  3rd Qu.:0.0000   3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.: 1.0000  
##  Max.   :3.0000   Max.   :5.000   Max.   :6.0000   Max.   :10.0000  
##                                                                     
##       huge             ideal            identity         includes     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.07387   Mean   :0.07045   Mean   :0.2944   Mean   :0.0638  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :5.00000   Max.   :3.00000   Max.   :1.0000   Max.   :4.0000  
##                                                                       
##    including         indoor          internal          internet      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.123   Mean   :0.1161   Mean   :0.06556   Mean   :0.07877  
##  3rd Qu.:0.000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :4.000   Max.   :4.0000   Max.   :3.00000   Max.   :3.00000  
##                                                                      
##       into              iron             its             jumio       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :1.0000   Median :0.0000   Median :1.0000  
##  Mean   :0.09119   Mean   :0.7967   Mean   :0.1034   Mean   :0.6832  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :5.00000   Max.   :3.0000   Max.   :5.0000   Max.   :1.0000  
##                                                                      
##     junction            just             king            kit        
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.000   Median :0.0000  
##  Mean   :0.08415   Mean   :0.2632   Mean   :0.127   Mean   :0.3418  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:1.0000  
##  Max.   :5.00000   Max.   :6.0000   Max.   :5.000   Max.   :4.0000  
##                                                                     
##     kitchen          laptop           large           laundry      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :2.000   Median :1.0000   Median :0.0000   Median :0.0000  
##  Mean   :1.633   Mean   :0.6645   Mean   :0.4742   Mean   :0.2444  
##  3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :6.000   Max.   :2.0000   Max.   :7.0000   Max.   :5.0000  
##                                                                    
##      leafy             level            light             like        
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.08033   Mean   :0.2143   Mean   :0.2474   Mean   :0.09775  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :3.00000   Max.   :5.0000   Max.   :5.0000   Max.   :4.00000  
##                                                                       
##      linen             linens           living           local       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.09002   Mean   :0.4167   Mean   :0.5711   Mean   :0.1132  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :3.00000   Max.   :3.0000   Max.   :6.0000   Max.   :4.0000  
##                                                                      
##     located          location          lock           lockbox      
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Median :0.0000   Median :0.0000  
##  Mean   :0.3774   Mean   :0.291   Mean   :0.1441   Mean   :0.1956  
##  3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :7.000   Max.   :4.0000   Max.   :3.0000  
##                                                                    
##       long           looking             lots             lounge      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.3226   Mean   :0.06321   Mean   :0.05998   Mean   :0.1918  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :4.00000   Max.   :4.00000   Max.   :6.0000  
##                                                                       
##       love             lovely          luggage           luxury       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.09462   Mean   :0.1286   Mean   :0.1877   Mean   :0.07358  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :4.00000   Max.   :5.0000   Max.   :2.0000   Max.   :3.00000  
##                                                                       
##     machine            main             make             maker       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.1531   Mean   :0.1226   Mean   :0.07681   Mean   :0.2652  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :5.0000   Max.   :5.0000   Max.   :4.00000   Max.   :4.0000  
##                                                                      
##      manly             manual            many            master    
##  Min.   : 0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00  
##  1st Qu.: 0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00  
##  Median : 0.0000   Median :0.0000   Median :0.0000   Median :0.00  
##  Mean   : 0.2197   Mean   :0.3267   Mean   :0.0683   Mean   :0.11  
##  3rd Qu.: 0.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00  
##  Max.   :10.0000   Max.   :3.0000   Max.   :3.0000   Max.   :5.00  
##                                                                    
##    microwave           min               mins             minute      
##  Min.   :0.0000   Min.   : 0.0000   Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median : 0.0000   Median : 0.0000   Median :0.0000  
##  Mean   :0.5262   Mean   : 0.2716   Mean   : 0.2573   Mean   :0.2136  
##  3rd Qu.:1.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :12.0000   Max.   :10.0000   Max.   :7.0000  
##                                                                       
##     minutes           modern          monoxide           more        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.3776   Mean   :0.3717   Mean   :0.2351   Mean   :0.07994  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :8.0000   Max.   :6.0000   Max.   :2.0000   Max.   :3.00000  
##                                                                      
##       most            natural             near              need       
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.08904   Mean   :0.06106   Mean   :0.09834   Mean   :0.1647  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :4.00000   Max.   :3.00000   Max.   :5.00000   Max.   :4.0000  
##                                                                        
##     netflix             new             newly             next.        
##  Min.   :0.00000   Min.   : 0.000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.: 1.000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median : 1.000   Median :0.00000   Median :0.00000  
##  Mean   :0.08239   Mean   : 1.091   Mean   :0.07759   Mean   :0.05959  
##  3rd Qu.:0.00000   3rd Qu.: 1.000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :3.00000   Max.   :13.000   Max.   :3.00000   Max.   :3.00000  
##                                                                        
##       nice             night             north             not        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.07573   Mean   :0.05793   Mean   :0.1331   Mean   :0.1335  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :5.00000   Max.   :4.00000   Max.   :7.0000   Max.   :5.0000  
##                                                                       
##       note           ocean              off             offer        
##  Min.   :0.000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.237   Mean   :0.09883   Mean   :0.1471   Mean   :0.07485  
##  3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :4.000   Max.   :6.00000   Max.   :3.0000   Max.   :3.00000  
##                                                                      
##      offers          offline            one              only       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.0771   Mean   :0.5101   Mean   :0.4055   Mean   :0.2705  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :2.0000   Max.   :8.0000   Max.   :5.0000  
##                                                                     
##       open            other             our               out        
##  Min.   :0.0000   Min.   :0.0000   Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median : 0.0000   Median :0.0000  
##  Mean   :0.2236   Mean   :0.2919   Mean   : 0.4974   Mean   :0.1906  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.: 1.0000   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :5.0000   Max.   :10.0000   Max.   :5.0000  
##                                                                      
##     outdoor          outside             oven             over       
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.1868   Mean   :0.07006   Mean   :0.4921   Mean   :0.1006  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :7.0000   Max.   :4.00000   Max.   :4.0000   Max.   :6.0000  
##                                                                      
##       own              pack              paid             park       
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.1854   Mean   :0.06556   Mean   :0.0998   Mean   :0.2452  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :6.0000   Max.   :2.00000   Max.   :3.0000   Max.   :9.0000  
##                                                                      
##     parking         parks             patio           peaceful      
##  Min.   :0.00   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :1.00   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :1.16   Mean   :0.06781   Mean   :0.3603   Mean   :0.06272  
##  3rd Qu.:2.00   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :8.00   Max.   :3.00000   Max.   :5.0000   Max.   :3.00000  
##                                                                     
##      people          perfect           phone          pillows      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :1.000   Median :0.0000  
##  Mean   :0.1068   Mean   :0.2652   Mean   :1.002   Mean   :0.2954  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.000   3rd Qu.:1.0000  
##  Max.   :5.0000   Max.   :5.0000   Max.   :3.000   Max.   :2.0000  
##                                                                    
##      place             plan             play             please      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.2826   Mean   :0.1347   Mean   :0.08268   Mean   :0.1417  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :6.0000   Max.   :5.0000   Max.   :3.00000   Max.   :5.0000  
##                                                                      
##      plenty            plus              pool           premises     
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.0000   Median :1.0000  
##  Mean   :0.1077   Mean   :0.05871   Mean   :0.4327   Mean   :0.5378  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :4.0000   Max.   :5.00000   Max.   :8.0000   Max.   :4.0000  
##                                                                      
##     private         property         provided          public      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :1.114   Mean   :0.1018   Mean   :0.1023   Mean   :0.1186  
##  3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :8.000   Max.   :5.0000   Max.   :4.0000   Max.   :3.0000  
##                                                                    
##     quality            queen            quiet         refrigerator  
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.000  
##  Mean   :0.06869   Mean   :0.3068   Mean   :0.2841   Mean   :0.503  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.000  
##  Max.   :4.00000   Max.   :5.0000   Max.   :4.0000   Max.   :3.000  
##                                                                     
##      relax            relaxing         renovated       restaurants    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.09755   Mean   :0.06223   Mean   :0.1662   Mean   :0.3567  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :3.00000   Max.   :3.00000   Max.   :4.0000   Max.   :6.0000  
##                                                                       
##     reviews           right              road             room       
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.: 0.000  
##  Median :1.0000   Median :0.00000   Median :0.0000   Median : 1.000  
##  Mean   :0.7418   Mean   :0.09599   Mean   :0.0817   Mean   : 1.416  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.: 2.000  
##  Max.   :3.0000   Max.   :4.00000   Max.   :4.0000   Max.   :11.000  
##                                                                      
##      rooms            second           secure             self        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.1108   Mean   :0.0862   Mean   :0.07613   Mean   :0.07583  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :6.0000   Max.   :4.0000   Max.   :3.00000   Max.   :4.00000  
##                                                                       
##      selfie          separate           set              shades       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.3174   Mean   :0.1107   Mean   :0.07544   Mean   :0.08112  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :2.0000   Max.   :5.0000   Max.   :3.00000   Max.   :1.00000  
##                                                                       
##     shampoo           share             shared          shopping     
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.00000   Median :0.0000   Median :0.0000  
##  Mean   :0.7204   Mean   :0.06155   Mean   :0.1475   Mean   :0.1584  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :3.0000   Max.   :6.00000   Max.   :8.0000   Max.   :4.0000  
##                                                                      
##      shops            short            shower         silverware   
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.000  
##  Mean   :0.2494   Mean   :0.1689   Mean   :0.2205   Mean   :0.497  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.000  
##  Max.   :4.0000   Max.   :4.0000   Max.   :4.0000   Max.   :2.000  
##                                                                    
##      single         situated            size            sized        
##  Min.   :0.000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.278   Mean   :0.07945   Mean   :0.1728   Mean   :0.08327  
##  3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :5.000   Max.   :3.00000   Max.   :6.0000   Max.   :5.00000  
##                                                                      
##      small            smoke             sofa             some        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.1262   Mean   :0.9178   Mean   :0.1069   Mean   :0.07045  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :5.0000   Max.   :3.0000   Max.   :5.0000   Max.   :4.00000  
##                                                                      
##      south            space          spacious         station      
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.0000   Median :1.000   Median :0.0000   Median :0.0000  
##  Mean   :0.8326   Mean   :1.103   Mean   :0.3282   Mean   :0.3072  
##  3rd Qu.:1.0000   3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :6.000   Max.   :4.0000   Max.   :6.0000  
##                                                                    
##       stay            stays             stop            stove       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2445   Mean   :0.2959   Mean   :0.1023   Mean   :0.4726  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :5.0000   Max.   :6.0000   Max.   :3.0000   Max.   :5.0000  
##                                                                     
##      street           stroll            studio           stunning      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :1.0000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.6623   Mean   :0.07916   Mean   :0.09638   Mean   :0.08483  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :6.0000   Max.   :3.00000   Max.   :8.00000   Max.   :4.00000  
##                                                                        
##      style            stylish            suite             summer       
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.09912   Mean   :0.07671   Mean   :0.07309   Mean   :0.06047  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :4.00000   Max.   :4.00000   Max.   :8.00000   Max.   :5.00000  
##                                                                         
##       sun             sunny            surry            swimming     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.1079   Mean   :0.1313   Mean   :0.08376   Mean   :0.0681  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :6.0000   Max.   :5.00000   Max.   :5.0000  
##                                                                      
##      sydney          table             take              tea         
##  Min.   :0.000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :1.000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :1.109   Mean   :0.1314   Mean   :0.06996   Mean   :0.05323  
##  3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :9.000   Max.   :4.0000   Max.   :3.00000   Max.   :3.00000  
##                                                                      
##       term           terrace            than              that       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.3016   Mean   :0.1453   Mean   :0.06243   Mean   :0.2633  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :5.0000   Max.   :4.00000   Max.   :5.0000  
##                                                                      
##       the             there            things           this       
##  Min.   : 0.000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.: 3.000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median : 6.000   Median :0.0000   Median :0.000   Median :0.0000  
##  Mean   : 5.997   Mean   :0.3977   Mean   :0.208   Mean   :0.6754  
##  3rd Qu.: 9.000   3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:1.0000  
##  Max.   :29.000   Max.   :7.0000   Max.   :3.000   Max.   :7.0000  
##                                                                    
##      three           throughout           time              top        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.06517   Mean   :0.06419   Mean   :0.07534   Mean   :0.1288  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :6.00000   Max.   :3.00000   Max.   :5.00000   Max.   :5.0000  
##                                                                        
##      towels             toys            train          transport     
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.09814   Mean   :0.1105   Mean   :0.2478   Mean   :0.1772  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :4.00000   Max.   :4.0000   Max.   :5.0000   Max.   :3.0000  
##                                                                      
##      travel             tub               two             u2019n       
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.08337   Mean   :0.09765   Mean   :0.3391   Mean   :0.06223  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :3.00000   Max.   :4.00000   Max.   :8.0000   Max.   :1.00000  
##                                                                        
##      u2019s            unit          unlimited           use        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.1468   Mean   :0.1108   Mean   :0.0547   Mean   :0.2044  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :6.0000   Max.   :3.0000   Max.   :6.0000  
##                                                                     
##     utensils            very             view            views       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.05704   Mean   :0.2566   Mean   :0.1307   Mean   :0.3091  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :2.00000   Max.   :7.0000   Max.   :6.0000   Max.   :9.0000  
##                                                                      
##     village            wales             walk            walking      
##  Min.   :0.00000   Min.   :0.0000   Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:1.0000   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :1.0000   Median : 0.0000   Median :0.0000  
##  Mean   :0.06712   Mean   :0.8166   Mean   : 0.7417   Mean   :0.1874  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.: 1.0000   3rd Qu.:0.0000  
##  Max.   :4.00000   Max.   :4.0000   Max.   :12.0000   Max.   :6.0000  
##                                                                       
##       want            wardrobe          washer          washing      
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :1.0000   Median :0.0000  
##  Mean   :0.05519   Mean   :0.1037   Mean   :0.9711   Mean   :0.1228  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :4.00000   Max.   :3.0000   Max.   :4.0000   Max.   :4.0000  
##                                                                      
##      water           welcome            well            where        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :1.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.6888   Mean   :0.1146   Mean   :0.1782   Mean   :0.07074  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :6.0000   Max.   :3.0000   Max.   :5.0000   Max.   :5.00000  
##                                                                      
##      which            while.             who              whole        
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.1785   Mean   :0.06448   Mean   :0.06517   Mean   :0.06732  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :4.0000   Max.   :3.00000   Max.   :3.00000   Max.   :3.00000  
##                                                                        
##       wifi            will             with            within      
##  Min.   :0.000   Min.   :0.0000   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.: 1.000   1st Qu.:0.0000  
##  Median :1.000   Median :0.0000   Median : 2.000   Median :0.0000  
##  Mean   :1.155   Mean   :0.4098   Mean   : 2.389   Mean   :0.1311  
##  3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.: 3.000   3rd Qu.:0.0000  
##  Max.   :5.000   Max.   :7.0000   Max.   :15.000   Max.   :4.0000  
##                                                                    
##       work          workspace           you              your        
##  Min.   :0.0000   Min.   :0.0000   Min.   : 0.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.: 0.000   1st Qu.: 0.0000  
##  Median :0.0000   Median :1.0000   Median : 1.000   Median : 0.0000  
##  Mean   :0.2168   Mean   :0.6635   Mean   : 1.338   Mean   : 0.5348  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.: 2.000   3rd Qu.: 1.0000  
##  Max.   :3.0000   Max.   :2.0000   Max.   :13.000   Max.   :10.0000  
## 
str(df)
## 'data.frame':    10220 obs. of  434 variables:
##  $ id                                          : int  24456644 23737008 40868452 21988667 8912495 39766755 43884965 8598248 29813566 21270682 ...
##  $ host_is_superhost                           : Factor w/ 2 levels "f","t": 1 1 1 1 1 1 1 1 1 2 ...
##  $ host_has_profile_pic                        : Factor w/ 2 levels "f","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ host_identity_verified                      : Factor w/ 2 levels "f","t": 2 2 2 1 2 2 2 2 2 2 ...
##  $ neighbourhood_cleansed                      : Factor w/ 38 levels "Ashfield","Auburn",..: 5 33 33 28 33 33 27 28 36 27 ...
##  $ instant_bookable                            : Factor w/ 2 levels "f","t": 1 2 1 2 1 2 2 2 2 1 ...
##  $ room_type                                   : Factor w/ 4 levels "Entire home/apt",..: 1 1 3 3 3 1 1 1 1 1 ...
##  $ host_listings_count                         : int  14 1 0 1 1 118 1 1 1 2 ...
##  $ host_total_listings_count                   : int  14 1 0 1 1 118 1 1 1 2 ...
##  $ latitude                                    : num  -33.9 -33.9 -33.9 -33.9 -33.9 ...
##  $ longitude                                   : num  151 151 151 151 151 ...
##  $ accommodates                                : int  4 4 2 1 1 3 2 4 3 6 ...
##  $ bedrooms                                    : int  1 2 1 1 1 1 1 1 1 3 ...
##  $ beds                                        : int  1 2 1 1 1 2 1 2 1 3 ...
##  $ price                                       : int  200 150 55 45 49 112 281 248 101 500 ...
##  $ minimum_nights                              : int  1 2 7 3 1 3 2 10 3 7 ...
##  $ maximum_nights                              : int  90 365 20 60 1125 1125 7 14 1125 1125 ...
##  $ minimum_minimum_nights                      : int  1 2 7 3 1 2 2 5 3 7 ...
##  $ maximum_minimum_nights                      : int  1 2 7 3 1 5 2 10 3 7 ...
##  $ minimum_maximum_nights                      : int  90 365 20 60 1125 1125 1125 14 1125 1125 ...
##  $ maximum_maximum_nights                      : int  90 365 20 60 1125 1125 1125 14 1125 1125 ...
##  $ minimum_nights_avg_ntm                      : num  1 2 7 3 1 3.1 2 8.6 3 7 ...
##  $ maximum_nights_avg_ntm                      : num  90 365 20 60 1125 ...
##  $ availability_30                             : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ availability_60                             : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ availability_90                             : int  0 0 0 0 0 0 2 0 0 0 ...
##  $ availability_365                            : int  0 0 0 0 0 126 2 0 0 0 ...
##  $ number_of_reviews_ltm                       : int  0 0 2 0 0 5 13 0 5 3 ...
##  $ number_of_reviews_l30d                      : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ review_scores_rating                        : int  94 96 90 90 90 92 100 98 92 98 ...
##  $ review_scores_accuracy                      : int  10 10 10 9 10 10 10 10 9 10 ...
##  $ review_scores_cleanliness                   : int  9 10 8 8 9 9 10 10 9 10 ...
##  $ review_scores_checkin                       : int  9 9 10 10 10 10 10 10 10 10 ...
##  $ review_scores_communication                 : int  9 10 10 10 10 10 10 10 10 10 ...
##  $ review_scores_location                      : int  9 10 8 8 9 10 10 10 10 10 ...
##  $ review_scores_value                         : int  9 10 9 9 9 8 10 10 9 10 ...
##  $ calculated_host_listings_count              : int  14 1 1 1 1 113 1 1 1 1 ...
##  $ calculated_host_listings_count_entire_homes : int  9 1 0 0 0 113 1 1 1 1 ...
##  $ calculated_host_listings_count_private_rooms: int  5 0 1 1 1 0 0 0 0 0 ...
##  $ calculated_host_listings_count_shared_rooms : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ reviews_per_month                           : num  1.34 1.68 0.2 0.06 0.03 0.43 3.1 0.16 2.04 0.29 ...
##  $ bathsum                                     : num  1 2 1 1 2 1 1 1 1 2 ...
##  $ bathclass                                   : int  0 0 1 1 0 0 0 0 0 0 ...
##  $ istop100                                    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ access                                      : int  1 3 0 1 0 0 0 2 2 0 ...
##  $ across                                      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ after                                       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ aid                                         : int  0 1 0 0 0 1 0 0 0 1 ...
##  $ air                                         : int  0 2 0 0 2 0 2 0 0 1 ...
##  $ airport                                     : int  0 2 0 0 0 0 0 0 0 0 ...
##  $ airy                                        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ alarm                                       : int  0 1 1 1 0 2 1 1 1 1 ...
##  $ all                                         : int  0 0 2 1 0 1 0 0 0 0 ...
##  $ allowed                                     : int  1 2 0 0 0 0 0 0 0 1 ...
##  $ also                                        : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ amazing                                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ amenities                                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ and                                         : int  2 8 5 2 0 10 1 13 5 10 ...
##  $ any                                         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ apartment                                   : int  2 2 0 1 0 3 2 2 4 0 ...
##  $ appliances                                  : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ are                                         : int  0 2 1 0 0 1 0 0 0 1 ...
##  $ area                                        : int  0 1 0 0 1 0 0 1 0 0 ...
##  $ areas                                       : int  0 1 0 0 0 0 0 1 1 0 ...
##  $ around                                      : int  0 0 0 0 0 1 0 1 0 0 ...
##  $ art                                         : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ australia                                   : int  1 1 1 1 0 1 1 1 1 0 ...
##  $ available                                   : int  0 0 1 1 0 0 0 1 0 0 ...
##  $ away                                        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ back                                        : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ backyard                                    : int  1 0 1 0 0 0 0 0 0 1 ...
##  $ balcony                                     : int  1 1 0 2 0 0 0 3 0 1 ...
##  $ bars                                        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ basics                                      : int  1 1 0 0 0 1 0 1 1 1 ...
##  $ bath                                        : int  0 0 0 0 0 0 0 1 0 1 ...
##  $ bathroom                                    : int  0 0 0 0 1 1 0 2 0 0 ...
##  $ bathrooms                                   : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ bathtub                                     : int  0 1 0 0 0 1 0 0 0 0 ...
##  $ bay                                         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ bbq                                         : int  1 0 0 0 0 0 2 0 0 2 ...
##  $ beach                                       : int  0 0 0 0 0 1 2 1 6 3 ...
##  $ beaches                                     : int  0 0 0 0 0 0 1 0 0 1 ...
##  $ beautiful                                   : int  0 0 1 0 0 0 1 0 0 0 ...
##  $ bed                                         : int  1 2 1 1 1 2 1 3 1 1 ...
##  $ bedroom                                     : int  0 1 0 5 3 0 0 5 2 0 ...
##  $ bedrooms.1                                  : int  0 1 2 0 0 0 0 0 0 0 ...
##  $ beds.1                                      : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ been                                        : int  0 0 0 0 0 1 0 0 1 0 ...
##  $ before                                      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ best                                        : int  0 0 0 0 0 1 0 0 0 1 ...
##  $ big                                         : int  0 0 0 2 0 0 0 0 0 0 ...
##  $ blankets                                    : int  1 1 0 1 0 0 1 0 1 1 ...
##  $ block                                       : int  0 0 0 0 0 1 0 1 0 0 ...
##  $ bondi                                       : int  0 0 0 0 0 1 0 1 4 0 ...
##  $ books                                       : int  0 1 0 0 0 0 0 1 0 1 ...
##  $ both                                        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ brand                                       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ breakfast                                   : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ bright                                      : int  0 0 1 0 0 1 0 0 0 0 ...
##   [list output truncated]
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
#Factor Drawing
for(i in 2:7){
  png(paste(i,names(df)[i],".png",sep=""))
  print({
    ggplot()+geom_bar(aes(x=df[,i],fill=as.factor(df$istop100)),position="fill")+
      labs(x=names(df)[i])+coord_flip()
  })
  dev.off()
}
#Numerical variables
for(i in 8:43){
  png(paste(i,".0",names(df)[i],".png",sep=""))
  print({
    ggplot()+geom_density(aes(x=df[,i],color=as.factor(df$istop100)))+
      labs(x=paste(i,names(df)[i],sep=" "))
  })
  dev.off()
}
#Logarithmic Numerical Variables
for(i in c(8,9,13,15,16,18,19,20,21,22,23,29,37,38,39,40,42)){
  png(paste(i,".1",names(df)[i],".png",sep=""))
  print({
    ggplot()+geom_density(aes(x=log(df[,i]+1),color=as.factor(df$istop100)))+
      labs(x=paste(i,"log",names(df)[i],sep=" "))
  })
  dev.off()
}
#Text
for(i in 45:434){
  png(paste(i,names(df)[i],".png",sep=""))
  print({
    ggplot()+geom_bar(aes(x=df[,i],fill=as.factor(df$istop100)),position="fill")+
      labs(x=paste(i,names(df)[i],sep=" "))+coord_flip()
  })
  dev.off()
}


#Load drawing-related packages
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.6.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ tibble  3.0.4     √ purrr   0.3.4
## √ tidyr   1.1.2     √ dplyr   1.0.2
## √ readr   1.4.0     √ forcats 0.5.0
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x dplyr::filter()     masks stats::filter()
## x purrr::flatten()    masks jsonlite::flatten()
## x dplyr::lag()        masks stats::lag()
library("jsonlite")
library("ggplot2")
library(plyr)
## Warning: package 'plyr' was built under R version 3.6.3
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
library(dplyr)
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.3
#Read map data neighborhoods.geojson
geoData2 <- readLines("neighbourhoods.geojson",warn=FALSE) %>%
  paste(collapse = "\n")%>%fromJSON(simplifyVector = FALSE)
#Non-top100 points
other <- df %>% 
  filter(istop100 == 0)
#top100 points
top_100 <- df %>% 
  filter(istop100 == 1) 
#Drawing a map
leaflet() %>% setView(lng = 151.1, lat = -33.8,zoom=10) %>%
  addCircleMarkers(lng = other$longitude, lat = other$latitude,radius = 2, 
                   stroke = FALSE,color = "green",fillOpacity = 0.5, group = "Other") %>%
  addCircleMarkers(lng = top_100$longitude, lat = top_100$latitude, radius = 3, 
                   stroke = FALSE,color = "red",fillOpacity = 0.9, group = "Top 100")%>%
  addGeoJSON(geoData2)
#Use random number seed 123 for stratified sampling
dfistop100<-subset(df,df$istop100==1)
dfnotop100<-subset(df,df$istop100==0)
set.seed(123)
sam1<-sample(100,60)
set.seed(123)
sam2<-sample(nrow(dfnotop100),nrow(dfnotop100)*0.6)
dftrain<-rbind(dfistop100[sam1,],dfnotop100[sam2,])
dftest<-rbind(dfistop100[-sam1,],dfnotop100[-sam2,])

library(gbm)
## Warning: package 'gbm' was built under R version 3.6.3
## Loaded gbm 2.1.8
modelgbm<-gbm(istop100~.,dftrain[,-1],distribution = "bernoulli",interaction.depth=6,
              shrinkage=0.01,n.trees=500,n.cores=6,cv.fold=3)
bestntrees<-gbm.perf(modelgbm,method="cv")

summary(modelgbm,n.trees=bestntrees)
##                                                                                       var
## reviews_per_month                                                       reviews_per_month
## neighbourhood_cleansed                                             neighbourhood_cleansed
## reviews                                                                           reviews
## number_of_reviews_ltm                                               number_of_reviews_ltm
## availability_365                                                         availability_365
## longitude                                                                       longitude
## selfie                                                                             selfie
## city                                                                                 city
## review_scores_rating                                                 review_scores_rating
## availability_90                                                           availability_90
## one                                                                                   one
## art                                                                                   art
## mins                                                                                 mins
## availability_30                                                           availability_30
## bondi                                                                               bondi
## street                                                                             street
## people                                                                             people
## price                                                                               price
## identity                                                                         identity
## the                                                                                   the
## best                                                                                 best
## bedroom                                                                           bedroom
## apartment                                                                       apartment
## availability_60                                                           availability_60
## dishwasher                                                                     dishwasher
## quiet                                                                               quiet
## calculated_host_listings_count_private_rooms calculated_host_listings_count_private_rooms
## number_of_reviews_l30d                                             number_of_reviews_l30d
## kitchen                                                                           kitchen
## guest                                                                               guest
## spacious                                                                         spacious
## government                                                                     government
## maximum_nights                                                             maximum_nights
## dryer                                                                               dryer
## backyard                                                                         backyard
## within                                                                             within
## latitude                                                                         latitude
## elevator                                                                         elevator
## bus                                                                                   bus
## darling                                                                           darling
## minimum_nights_avg_ntm                                             minimum_nights_avg_ntm
## water                                                                               water
## calculated_host_listings_count_entire_homes   calculated_host_listings_count_entire_homes
## internet                                                                         internet
## please                                                                             please
## your                                                                                 your
## living                                                                             living
## bathroom                                                                         bathroom
## all                                                                                   all
## and                                                                                   and
## maximum_minimum_nights                                             maximum_minimum_nights
## has                                                                                   has
## top                                                                                   top
## parking                                                                           parking
## just                                                                                 just
## door                                                                                 door
## washing                                                                           washing
## not                                                                                   not
## other                                                                               other
## walk                                                                                 walk
## place                                                                               place
## room                                                                                 room
## large                                                                               large
## stop                                                                                 stop
## there                                                                               there
## min                                                                                   min
## with                                                                                 with
## balcony                                                                           balcony
## cafes                                                                               cafes
## private                                                                           private
## maker                                                                               maker
## instant_bookable                                                         instant_bookable
## minimum_minimum_nights                                             minimum_minimum_nights
## heart                                                                               heart
## terrace                                                                           terrace
## access                                                                             access
## free                                                                                 free
## linens                                                                             linens
## beds                                                                                 beds
## note                                                                                 note
## conditioning                                                                 conditioning
## you                                                                                   you
## everything                                                                     everything
## for.                                                                                 for.
## house                                                                               house
## guests                                                                             guests
## alarm                                                                               alarm
## away                                                                                 away
## travel                                                                             travel
## amazing                                                                           amazing
## restaurants                                                                   restaurants
## sydney                                                                             sydney
## hot                                                                                   hot
## wifi                                                                                 wifi
## that                                                                                 that
## over                                                                                 over
## right                                                                               right
## manual                                                                             manual
## size                                                                                 size
## out                                                                                   out
## ground                                                                             ground
## great                                                                               great
## calculated_host_listings_count                             calculated_host_listings_count
## buses                                                                               buses
## host_listings_count                                                   host_listings_count
## modern                                                                             modern
## beach                                                                               beach
## accommodates                                                                 accommodates
## microwave                                                                       microwave
## very                                                                                 very
## single                                                                             single
## double                                                                             double
## offer                                                                               offer
## lockbox                                                                           lockbox
## have                                                                                 have
## king                                                                                 king
## extra                                                                               extra
## our                                                                                   our
## this                                                                                 this
## from                                                                                 from
## like                                                                                 like
## located                                                                           located
## station                                                                           station
## queen                                                                               queen
## park                                                                                 park
## premises                                                                         premises
## host_is_superhost                                                       host_is_superhost
## review_scores_cleanliness                                       review_scores_cleanliness
## rooms                                                                               rooms
## blankets                                                                         blankets
## email                                                                               email
## unlimited                                                                       unlimited
## bay                                                                                   bay
## level                                                                               level
## beaches                                                                           beaches
## are                                                                                   are
## situated                                                                         situated
## coffee                                                                             coffee
## garden                                                                             garden
## offline                                                                           offline
## beautiful                                                                       beautiful
## minimum_maximum_nights                                             minimum_maximum_nights
## paid                                                                                 paid
## aid                                                                                   aid
## allowed                                                                           allowed
## bathclass                                                                       bathclass
## new                                                                                   new
## bbq                                                                                   bbq
## dropoff                                                                           dropoff
## light                                                                               light
## harbour                                                                           harbour
## bathsum                                                                           bathsum
## patio                                                                               patio
## own                                                                                   own
## oven                                                                                 oven
## can                                                                                   can
## floor                                                                               floor
## plus                                                                                 plus
## beds.1                                                                             beds.1
## train                                                                               train
## crib                                                                                 crib
## bed                                                                                   bed
## pack                                                                                 pack
## enjoy                                                                               enjoy
## peaceful                                                                         peaceful
## extinguisher                                                                 extinguisher
## bright                                                                             bright
## refrigerator                                                                 refrigerator
## review_scores_accuracy                                             review_scores_accuracy
## which                                                                               which
## across                                                                             across
## separate                                                                         separate
## chair                                                                               chair
## tea                                                                                   tea
## front                                                                               front
## available                                                                       available
## air                                                                                   air
## bedrooms                                                                         bedrooms
## off                                                                                   off
## sofa                                                                                 sofa
## manly                                                                               manly
## minutes                                                                           minutes
## space                                                                               space
## work                                                                                 work
## stays                                                                               stays
## cleaning                                                                         cleaning
## lovely                                                                             lovely
## things                                                                             things
## shower                                                                             shower
## many                                                                                 many
## minute                                                                             minute
## stay                                                                                 stay
## ocean                                                                               ocean
## location                                                                         location
## home                                                                                 home
## views                                                                               views
## studio                                                                             studio
## easy                                                                                 easy
## equipped                                                                         equipped
## laundry                                                                           laundry
## host_has_profile_pic                                                 host_has_profile_pic
## host_identity_verified                                             host_identity_verified
## room_type                                                                       room_type
## host_total_listings_count                                       host_total_listings_count
## minimum_nights                                                             minimum_nights
## maximum_maximum_nights                                             maximum_maximum_nights
## maximum_nights_avg_ntm                                             maximum_nights_avg_ntm
## review_scores_checkin                                               review_scores_checkin
## review_scores_communication                                   review_scores_communication
## review_scores_location                                             review_scores_location
## review_scores_value                                                   review_scores_value
## calculated_host_listings_count_shared_rooms   calculated_host_listings_count_shared_rooms
## after                                                                               after
## airport                                                                           airport
## airy                                                                                 airy
## also                                                                                 also
## amenities                                                                       amenities
## any                                                                                   any
## appliances                                                                     appliances
## area                                                                                 area
## areas                                                                               areas
## around                                                                             around
## australia                                                                       australia
## back                                                                                 back
## bars                                                                                 bars
## basics                                                                             basics
## bath                                                                                 bath
## bathrooms                                                                       bathrooms
## bathtub                                                                           bathtub
## bedrooms.1                                                                     bedrooms.1
## been                                                                                 been
## before                                                                             before
## big                                                                                   big
## block                                                                               block
## books                                                                               books
## both                                                                                 both
## brand                                                                               brand
## breakfast                                                                       breakfast
## building                                                                         building
## built                                                                               built
## business                                                                         business
## but                                                                                   but
## cable                                                                               cable
## cafe                                                                                 cafe
## car                                                                                   car
## carbon                                                                             carbon
## cbd                                                                                   cbd
## central                                                                           central
## centre                                                                             centre
## children                                                                         children
## clean                                                                               clean
## close                                                                               close
## comfortable                                                                   comfortable
## comfy                                                                               comfy
## connection                                                                     connection
## convenient                                                                     convenient
## coogee                                                                             coogee
## cooking                                                                           cooking
## cosy                                                                                 cosy
## couple                                                                             couple
## couples                                                                           couples
## courtyard                                                                       courtyard
## darkening                                                                       darkening
## day                                                                                   day
## deck                                                                                 deck
## dining                                                                             dining
## dishes                                                                             dishes
## distance                                                                         distance
## down                                                                                 down
## downstairs                                                                     downstairs
## drive                                                                               drive
## ensuite                                                                           ensuite
## entire                                                                             entire
## entrance                                                                         entrance
## essentials                                                                     essentials
## etc                                                                                   etc
## ethernet                                                                         ethernet
## facebook                                                                         facebook
## facilities                                                                     facilities
## families                                                                         families
## family                                                                             family
## famous                                                                             famous
## features                                                                         features
## feel                                                                                 feel
## ferry                                                                               ferry
## few                                                                                   few
## filled                                                                             filled
## fire                                                                                 fire
## fireplace                                                                       fireplace
## first                                                                               first
## flat                                                                                 flat
## fridge                                                                             fridge
## friendly                                                                         friendly
## full                                                                                 full
## fully                                                                               fully
## furnished                                                                       furnished
## gas                                                                                   gas
## gel                                                                                   gel
## get                                                                                   get
## good                                                                                 good
## greets                                                                             greets
## grill                                                                               grill
## gym                                                                                   gym
## hair                                                                                 hair
## hangers                                                                           hangers
## heating                                                                           heating
## high                                                                                 high
## hills                                                                               hills
## holiday                                                                           holiday
## host                                                                                 host
## hotel                                                                               hotel
## huge                                                                                 huge
## ideal                                                                               ideal
## includes                                                                         includes
## including                                                                       including
## indoor                                                                             indoor
## internal                                                                         internal
## into                                                                                 into
## iron                                                                                 iron
## its                                                                                   its
## jumio                                                                               jumio
## junction                                                                         junction
## kit                                                                                   kit
## laptop                                                                             laptop
## leafy                                                                               leafy
## linen                                                                               linen
## local                                                                               local
## lock                                                                                 lock
## long                                                                                 long
## looking                                                                           looking
## lots                                                                                 lots
## lounge                                                                             lounge
## love                                                                                 love
## luggage                                                                           luggage
## luxury                                                                             luxury
## machine                                                                           machine
## main                                                                                 main
## make                                                                                 make
## master                                                                             master
## monoxide                                                                         monoxide
## more                                                                                 more
## most                                                                                 most
## natural                                                                           natural
## near                                                                                 near
## need                                                                                 need
## netflix                                                                           netflix
## newly                                                                               newly
## next.                                                                               next.
## nice                                                                                 nice
## night                                                                               night
## north                                                                               north
## offers                                                                             offers
## only                                                                                 only
## open                                                                                 open
## outdoor                                                                           outdoor
## outside                                                                           outside
## parks                                                                               parks
## perfect                                                                           perfect
## phone                                                                               phone
## pillows                                                                           pillows
## plan                                                                                 plan
## play                                                                                 play
## plenty                                                                             plenty
## pool                                                                                 pool
## property                                                                         property
## provided                                                                         provided
## public                                                                             public
## quality                                                                           quality
## relax                                                                               relax
## relaxing                                                                         relaxing
## renovated                                                                       renovated
## road                                                                                 road
## second                                                                             second
## secure                                                                             secure
## self                                                                                 self
## set                                                                                   set
## shades                                                                             shades
## shampoo                                                                           shampoo
## share                                                                               share
## shared                                                                             shared
## shopping                                                                         shopping
## shops                                                                               shops
## short                                                                               short
## silverware                                                                     silverware
## sized                                                                               sized
## small                                                                               small
## smoke                                                                               smoke
## some                                                                                 some
## south                                                                               south
## stove                                                                               stove
## stroll                                                                             stroll
## stunning                                                                         stunning
## style                                                                               style
## stylish                                                                           stylish
## suite                                                                               suite
## summer                                                                             summer
## sun                                                                                   sun
## sunny                                                                               sunny
## surry                                                                               surry
## swimming                                                                         swimming
## table                                                                               table
## take                                                                                 take
## term                                                                                 term
## than                                                                                 than
## three                                                                               three
## throughout                                                                     throughout
## time                                                                                 time
## towels                                                                             towels
## toys                                                                                 toys
## transport                                                                       transport
## tub                                                                                   tub
## two                                                                                   two
## u2019n                                                                             u2019n
## u2019s                                                                             u2019s
## unit                                                                                 unit
## use                                                                                   use
## utensils                                                                         utensils
## view                                                                                 view
## village                                                                           village
## wales                                                                               wales
## walking                                                                           walking
## want                                                                                 want
## wardrobe                                                                         wardrobe
## washer                                                                             washer
## welcome                                                                           welcome
## well                                                                                 well
## where                                                                               where
## while.                                                                             while.
## who                                                                                   who
## whole                                                                               whole
## will                                                                                 will
## workspace                                                                       workspace
##                                                   rel.inf
## reviews_per_month                            24.613502968
## neighbourhood_cleansed                       12.265933934
## reviews                                       8.560103762
## number_of_reviews_ltm                         5.357568389
## availability_365                              4.253743060
## longitude                                     1.623172228
## selfie                                        1.508859783
## city                                          1.460222682
## review_scores_rating                          1.247490343
## availability_90                               1.236219765
## one                                           1.118431715
## art                                           1.033383759
## mins                                          0.971954496
## availability_30                               0.907957939
## bondi                                         0.900262822
## street                                        0.877715179
## people                                        0.812121710
## price                                         0.700881391
## identity                                      0.672910578
## the                                           0.662803331
## best                                          0.621309429
## bedroom                                       0.602930074
## apartment                                     0.593157924
## availability_60                               0.572817872
## dishwasher                                    0.566341057
## quiet                                         0.551939112
## calculated_host_listings_count_private_rooms  0.547608596
## number_of_reviews_l30d                        0.546756916
## kitchen                                       0.546745036
## guest                                         0.536979226
## spacious                                      0.506771380
## government                                    0.503765608
## maximum_nights                                0.497954753
## dryer                                         0.461363929
## backyard                                      0.445761349
## within                                        0.433153558
## latitude                                      0.426932380
## elevator                                      0.421488436
## bus                                           0.391377286
## darling                                       0.389180233
## minimum_nights_avg_ntm                        0.374192172
## water                                         0.368655714
## calculated_host_listings_count_entire_homes   0.361576176
## internet                                      0.344472535
## please                                        0.338026824
## your                                          0.310810038
## living                                        0.299216898
## bathroom                                      0.271929309
## all                                           0.270051193
## and                                           0.269989723
## maximum_minimum_nights                        0.266813965
## has                                           0.266114731
## top                                           0.265078309
## parking                                       0.264764578
## just                                          0.262840214
## door                                          0.258559637
## washing                                       0.251099291
## not                                           0.245278860
## other                                         0.245207905
## walk                                          0.239775220
## place                                         0.233867292
## room                                          0.229530763
## large                                         0.228765327
## stop                                          0.226518880
## there                                         0.220734646
## min                                           0.219608515
## with                                          0.218409940
## balcony                                       0.215668084
## cafes                                         0.213564642
## private                                       0.211528121
## maker                                         0.201868803
## instant_bookable                              0.201801971
## minimum_minimum_nights                        0.200552801
## heart                                         0.200283598
## terrace                                       0.196308835
## access                                        0.195995696
## free                                          0.192851668
## linens                                        0.190352385
## beds                                          0.189719041
## note                                          0.184010260
## conditioning                                  0.174968778
## you                                           0.174176862
## everything                                    0.174008660
## for.                                          0.159275082
## house                                         0.158654590
## guests                                        0.157091248
## alarm                                         0.156422459
## away                                          0.155452658
## travel                                        0.153770328
## amazing                                       0.151041946
## restaurants                                   0.149154980
## sydney                                        0.146412707
## hot                                           0.145216010
## wifi                                          0.143995686
## that                                          0.141876710
## over                                          0.141219781
## right                                         0.137663449
## manual                                        0.134973884
## size                                          0.134429080
## out                                           0.128180426
## ground                                        0.126768625
## great                                         0.125192742
## calculated_host_listings_count                0.123953405
## buses                                         0.123743888
## host_listings_count                           0.122646182
## modern                                        0.119245777
## beach                                         0.119153122
## accommodates                                  0.117324139
## microwave                                     0.114674698
## very                                          0.113721757
## single                                        0.111492474
## double                                        0.111104354
## offer                                         0.107096570
## lockbox                                       0.106853067
## have                                          0.106157907
## king                                          0.105649837
## extra                                         0.105601234
## our                                           0.105538462
## this                                          0.104100850
## from                                          0.103952701
## like                                          0.103673118
## located                                       0.100630456
## station                                       0.094136552
## queen                                         0.093478042
## park                                          0.092687337
## premises                                      0.091224849
## host_is_superhost                             0.086906712
## review_scores_cleanliness                     0.086487433
## rooms                                         0.086426433
## blankets                                      0.084698169
## email                                         0.084143306
## unlimited                                     0.084085325
## bay                                           0.084083250
## level                                         0.082705929
## beaches                                       0.081180534
## are                                           0.081006391
## situated                                      0.077536782
## coffee                                        0.077508954
## garden                                        0.074978400
## offline                                       0.074687471
## beautiful                                     0.073126592
## minimum_maximum_nights                        0.071464930
## paid                                          0.071000392
## aid                                           0.068752815
## allowed                                       0.068396547
## bathclass                                     0.068148023
## new                                           0.066885436
## bbq                                           0.065877101
## dropoff                                       0.063190317
## light                                         0.060163432
## harbour                                       0.058962693
## bathsum                                       0.057892041
## patio                                         0.054864501
## own                                           0.054280857
## oven                                          0.050163737
## can                                           0.050115263
## floor                                         0.048464846
## plus                                          0.045951253
## beds.1                                        0.044193785
## train                                         0.043100054
## crib                                          0.041087411
## bed                                           0.039789714
## pack                                          0.039680673
## enjoy                                         0.038381695
## peaceful                                      0.036718314
## extinguisher                                  0.036646199
## bright                                        0.036001845
## refrigerator                                  0.035837685
## review_scores_accuracy                        0.035588748
## which                                         0.035470624
## across                                        0.034347017
## separate                                      0.031553276
## chair                                         0.031170407
## tea                                           0.029782335
## front                                         0.028914750
## available                                     0.028555134
## air                                           0.026182828
## bedrooms                                      0.025613137
## off                                           0.024917679
## sofa                                          0.024152449
## manly                                         0.022493610
## minutes                                       0.019576328
## space                                         0.019507136
## work                                          0.018899655
## stays                                         0.018608734
## cleaning                                      0.018499808
## lovely                                        0.018259539
## things                                        0.018227699
## shower                                        0.014635440
## many                                          0.011675872
## minute                                        0.011571822
## stay                                          0.011254694
## ocean                                         0.009773319
## location                                      0.006427817
## home                                          0.005306123
## views                                         0.004747560
## studio                                        0.004670207
## easy                                          0.003812323
## equipped                                      0.003579804
## laundry                                       0.002894869
## host_has_profile_pic                          0.000000000
## host_identity_verified                        0.000000000
## room_type                                     0.000000000
## host_total_listings_count                     0.000000000
## minimum_nights                                0.000000000
## maximum_maximum_nights                        0.000000000
## maximum_nights_avg_ntm                        0.000000000
## review_scores_checkin                         0.000000000
## review_scores_communication                   0.000000000
## review_scores_location                        0.000000000
## review_scores_value                           0.000000000
## calculated_host_listings_count_shared_rooms   0.000000000
## after                                         0.000000000
## airport                                       0.000000000
## airy                                          0.000000000
## also                                          0.000000000
## amenities                                     0.000000000
## any                                           0.000000000
## appliances                                    0.000000000
## area                                          0.000000000
## areas                                         0.000000000
## around                                        0.000000000
## australia                                     0.000000000
## back                                          0.000000000
## bars                                          0.000000000
## basics                                        0.000000000
## bath                                          0.000000000
## bathrooms                                     0.000000000
## bathtub                                       0.000000000
## bedrooms.1                                    0.000000000
## been                                          0.000000000
## before                                        0.000000000
## big                                           0.000000000
## block                                         0.000000000
## books                                         0.000000000
## both                                          0.000000000
## brand                                         0.000000000
## breakfast                                     0.000000000
## building                                      0.000000000
## built                                         0.000000000
## business                                      0.000000000
## but                                           0.000000000
## cable                                         0.000000000
## cafe                                          0.000000000
## car                                           0.000000000
## carbon                                        0.000000000
## cbd                                           0.000000000
## central                                       0.000000000
## centre                                        0.000000000
## children                                      0.000000000
## clean                                         0.000000000
## close                                         0.000000000
## comfortable                                   0.000000000
## comfy                                         0.000000000
## connection                                    0.000000000
## convenient                                    0.000000000
## coogee                                        0.000000000
## cooking                                       0.000000000
## cosy                                          0.000000000
## couple                                        0.000000000
## couples                                       0.000000000
## courtyard                                     0.000000000
## darkening                                     0.000000000
## day                                           0.000000000
## deck                                          0.000000000
## dining                                        0.000000000
## dishes                                        0.000000000
## distance                                      0.000000000
## down                                          0.000000000
## downstairs                                    0.000000000
## drive                                         0.000000000
## ensuite                                       0.000000000
## entire                                        0.000000000
## entrance                                      0.000000000
## essentials                                    0.000000000
## etc                                           0.000000000
## ethernet                                      0.000000000
## facebook                                      0.000000000
## facilities                                    0.000000000
## families                                      0.000000000
## family                                        0.000000000
## famous                                        0.000000000
## features                                      0.000000000
## feel                                          0.000000000
## ferry                                         0.000000000
## few                                           0.000000000
## filled                                        0.000000000
## fire                                          0.000000000
## fireplace                                     0.000000000
## first                                         0.000000000
## flat                                          0.000000000
## fridge                                        0.000000000
## friendly                                      0.000000000
## full                                          0.000000000
## fully                                         0.000000000
## furnished                                     0.000000000
## gas                                           0.000000000
## gel                                           0.000000000
## get                                           0.000000000
## good                                          0.000000000
## greets                                        0.000000000
## grill                                         0.000000000
## gym                                           0.000000000
## hair                                          0.000000000
## hangers                                       0.000000000
## heating                                       0.000000000
## high                                          0.000000000
## hills                                         0.000000000
## holiday                                       0.000000000
## host                                          0.000000000
## hotel                                         0.000000000
## huge                                          0.000000000
## ideal                                         0.000000000
## includes                                      0.000000000
## including                                     0.000000000
## indoor                                        0.000000000
## internal                                      0.000000000
## into                                          0.000000000
## iron                                          0.000000000
## its                                           0.000000000
## jumio                                         0.000000000
## junction                                      0.000000000
## kit                                           0.000000000
## laptop                                        0.000000000
## leafy                                         0.000000000
## linen                                         0.000000000
## local                                         0.000000000
## lock                                          0.000000000
## long                                          0.000000000
## looking                                       0.000000000
## lots                                          0.000000000
## lounge                                        0.000000000
## love                                          0.000000000
## luggage                                       0.000000000
## luxury                                        0.000000000
## machine                                       0.000000000
## main                                          0.000000000
## make                                          0.000000000
## master                                        0.000000000
## monoxide                                      0.000000000
## more                                          0.000000000
## most                                          0.000000000
## natural                                       0.000000000
## near                                          0.000000000
## need                                          0.000000000
## netflix                                       0.000000000
## newly                                         0.000000000
## next.                                         0.000000000
## nice                                          0.000000000
## night                                         0.000000000
## north                                         0.000000000
## offers                                        0.000000000
## only                                          0.000000000
## open                                          0.000000000
## outdoor                                       0.000000000
## outside                                       0.000000000
## parks                                         0.000000000
## perfect                                       0.000000000
## phone                                         0.000000000
## pillows                                       0.000000000
## plan                                          0.000000000
## play                                          0.000000000
## plenty                                        0.000000000
## pool                                          0.000000000
## property                                      0.000000000
## provided                                      0.000000000
## public                                        0.000000000
## quality                                       0.000000000
## relax                                         0.000000000
## relaxing                                      0.000000000
## renovated                                     0.000000000
## road                                          0.000000000
## second                                        0.000000000
## secure                                        0.000000000
## self                                          0.000000000
## set                                           0.000000000
## shades                                        0.000000000
## shampoo                                       0.000000000
## share                                         0.000000000
## shared                                        0.000000000
## shopping                                      0.000000000
## shops                                         0.000000000
## short                                         0.000000000
## silverware                                    0.000000000
## sized                                         0.000000000
## small                                         0.000000000
## smoke                                         0.000000000
## some                                          0.000000000
## south                                         0.000000000
## stove                                         0.000000000
## stroll                                        0.000000000
## stunning                                      0.000000000
## style                                         0.000000000
## stylish                                       0.000000000
## suite                                         0.000000000
## summer                                        0.000000000
## sun                                           0.000000000
## sunny                                         0.000000000
## surry                                         0.000000000
## swimming                                      0.000000000
## table                                         0.000000000
## take                                          0.000000000
## term                                          0.000000000
## than                                          0.000000000
## three                                         0.000000000
## throughout                                    0.000000000
## time                                          0.000000000
## towels                                        0.000000000
## toys                                          0.000000000
## transport                                     0.000000000
## tub                                           0.000000000
## two                                           0.000000000
## u2019n                                        0.000000000
## u2019s                                        0.000000000
## unit                                          0.000000000
## use                                           0.000000000
## utensils                                      0.000000000
## view                                          0.000000000
## village                                       0.000000000
## wales                                         0.000000000
## walking                                       0.000000000
## want                                          0.000000000
## wardrobe                                      0.000000000
## washer                                        0.000000000
## welcome                                       0.000000000
## well                                          0.000000000
## where                                         0.000000000
## while.                                        0.000000000
## who                                           0.000000000
## whole                                         0.000000000
## will                                          0.000000000
## workspace                                     0.000000000
dfht<-summary(modelgbm,n.trees=bestntrees)

ggplot(aes(x=reorder(var,rel.inf),y=rel.inf),data=dfht[1:30,])+
  geom_bar(stat="identity",fill="blue")+coord_flip() #Independent variable importance drawing

head(dfht,40)
##                                                                                       var
## reviews_per_month                                                       reviews_per_month
## neighbourhood_cleansed                                             neighbourhood_cleansed
## reviews                                                                           reviews
## number_of_reviews_ltm                                               number_of_reviews_ltm
## availability_365                                                         availability_365
## longitude                                                                       longitude
## selfie                                                                             selfie
## city                                                                                 city
## review_scores_rating                                                 review_scores_rating
## availability_90                                                           availability_90
## one                                                                                   one
## art                                                                                   art
## mins                                                                                 mins
## availability_30                                                           availability_30
## bondi                                                                               bondi
## street                                                                             street
## people                                                                             people
## price                                                                               price
## identity                                                                         identity
## the                                                                                   the
## best                                                                                 best
## bedroom                                                                           bedroom
## apartment                                                                       apartment
## availability_60                                                           availability_60
## dishwasher                                                                     dishwasher
## quiet                                                                               quiet
## calculated_host_listings_count_private_rooms calculated_host_listings_count_private_rooms
## number_of_reviews_l30d                                             number_of_reviews_l30d
## kitchen                                                                           kitchen
## guest                                                                               guest
## spacious                                                                         spacious
## government                                                                     government
## maximum_nights                                                             maximum_nights
## dryer                                                                               dryer
## backyard                                                                         backyard
## within                                                                             within
## latitude                                                                         latitude
## elevator                                                                         elevator
## bus                                                                                   bus
## darling                                                                           darling
##                                                 rel.inf
## reviews_per_month                            24.6135030
## neighbourhood_cleansed                       12.2659339
## reviews                                       8.5601038
## number_of_reviews_ltm                         5.3575684
## availability_365                              4.2537431
## longitude                                     1.6231722
## selfie                                        1.5088598
## city                                          1.4602227
## review_scores_rating                          1.2474903
## availability_90                               1.2362198
## one                                           1.1184317
## art                                           1.0333838
## mins                                          0.9719545
## availability_30                               0.9079579
## bondi                                         0.9002628
## street                                        0.8777152
## people                                        0.8121217
## price                                         0.7008814
## identity                                      0.6729106
## the                                           0.6628033
## best                                          0.6213094
## bedroom                                       0.6029301
## apartment                                     0.5931579
## availability_60                               0.5728179
## dishwasher                                    0.5663411
## quiet                                         0.5519391
## calculated_host_listings_count_private_rooms  0.5476086
## number_of_reviews_l30d                        0.5467569
## kitchen                                       0.5467450
## guest                                         0.5369792
## spacious                                      0.5067714
## government                                    0.5037656
## maximum_nights                                0.4979548
## dryer                                         0.4613639
## backyard                                      0.4457613
## within                                        0.4331536
## latitude                                      0.4269324
## elevator                                      0.4214884
## bus                                           0.3913773
## darling                                       0.3891802
pretest<-predict(modelgbm,n.trees=bestntrees,dftest)
library(pROC)
## Warning: package 'pROC' was built under R version 3.6.3
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
roc1<-roc(dftest$istop100,pretest,print.auc=T,plot=T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

bestdd<-coords(roc1,"best")[1,1]
## Warning in coords.roc(roc1, "best"): The 'transpose' argument to FALSE
## by default since pROC 1.16. Set transpose = TRUE explicitly to revert to
## the previous behavior, or transpose = TRUE to silence this warning. Type
## help(coords_transpose) for additional information.
ma<-table(dftest$istop100,ifelse(pretest>bestdd,1,0))
ma
##    
##        0    1
##   0 3778  270
##   1    1   39
sum(diag(ma))/sum(ma)
## [1] 0.9337084
coords(roc1,"best")
## Warning in coords.roc(roc1, "best"): The 'transpose' argument to FALSE
## by default since pROC 1.16. Set transpose = TRUE explicitly to revert to
## the previous behavior, or transpose = TRUE to silence this warning. Type
## help(coords_transpose) for additional information.
##   threshold specificity sensitivity
## 1 -6.226547   0.9333004       0.975